# Purpose: Contains functions that handle loading projected impacts from the
# projection output in `data/2_projection/3_impacts/`. Note that maintaining the
# folder structure within this directory is critical for these functions to
# operate correctly.

# Default input directory for extracted Monte Carlo projection output
MC_INPUT_DEFAULT = glue('{DB}/2_projection/3_impacts/',
    'main_specification/extracted/montecarlo')

# Default input directory for raw single projection output
S_INPUT_DEFAULT = glue('{DB}/2_projection/3_impacts/',
    'main_specification/raw/single/rcp85/CCSM4/low/SSP3')

#' This function returns monte carlo projection results from CSVs generated by
#' `quantiles.py`, which converts the raw projection system output into a mean or
#' quantile of the climate and statstical uncertainty distribution. See
#' `2_projection/README.md` for information regarding the requisite folder
#' structure for Mortality projections.
#' 
#' Inputs
#' ------
#' Directory containing extracted projection outputs and various parameters
#' defining the desires impacts. For example, the main specification output is  
#' located at `data/2_projection/3_impacts/main_specification/extracted/montecarlo/` 
#' which contains sub-directories for each rcp-iam-ssp combination. Within those
#' sub-directories are CSV files for the various age-groups, adaptaion
#' scenarios, spatial resolutions, and units of mortality impacts needed to
#' produce final paper output.  This function accepts each dimension as an input
#' parameter, doing the work of finding your desired output for you.
#' 
#' Outputs
#' -------
#' Dataframe containing projection output consistent with the input parameters.
#' 
#' Parameters/Return
#' -----------------
#' @param ssp SSP scenario (SSP2-4)
#' @param rcp RCP scenario (rcp45, rcp85)
#' @param iam Economic modelling scenario (low, high)
#' @param input_dir Directory containing extracted impacts (note default)
#' @param regions Regions, can be IRs or aggregated regions. Also accepts:
#' - all: all ~25k impact regions; 
#' - iso: country-level output; 
#' - global: global outputs;
#' - US states;
#' @param qtile accepts mean and/or quantiles, e.g., c('mean', 'q05', 'q95').
#' @param scn adaptation scenario (fulladapt, incadapt, noadapt, costs, 
#' fulladaptcosts, incbenefits, climbenefits).
#' @param age agegroup (young, older, oldest, combined)
#' @param year_list List of years to extract between 1981 and 2099.
#' @param units unit of output (rates, levels).
#' @param scale_variable Can be used to scale output, e.g. thousands of deaths.
#' @param adtl Additional columns needed from input CSV. This is primarily
#' useful when we need GCM-monte carlo batch level output for the density plots
#' in Figure 6.
#' @param scale_variable Can be used to scale output, e.g. thousands of deaths.
#' 
#' @return Dataframe containing projected impacts.
get_mortality_impacts = function(
    ssp='SSP3',
    rcp='rcp85',
    iam='low',
    input_dir=MC_INPUT_DEFAULT,
    regions='all',
    qtile='mean',
    scn='fulladaptcosts',
    age='combined',
    year_list=seq(2020,2099),
    units='rates',
    extract='edfcsv',
    scale_variable=1,
    adtl=c(),
    as.DT=FALSE) {

    # Parse inputs to determine list of regions
    region_list = return_region_list(regions)
    resolution_list = check_resolution(region_list)
    scale_func = function(x, scl) (x * scl)

    # Change parameters if GCM-level values are needed.
    if (extract=='valuescsv') {
        qtile = 'value'
        adtl = c(adtl, 'batch', 'gcm', 'weight')
    }

    varlist = c('region', 'year', qtile, adtl)

    # Construct dataframes with relevant impacts at each spatial resolution.
    dflist = list()
    for (resolution in names(resolution_list)) {

        if (units != 'levels' | resolution != 'aggregated') {
            # Extract projection output from CSVs.
            dflist[[resolution]] = memo.csv(glue('{input_dir}/{rcp}/{iam}/{ssp}/',
            '{rcp}-{ssp}-{age}-{scn}-{iam}-{resolution}-{units}-{extract}.csv'))[
                # Subset regions/years.
                year %in% year_list &
                region %in% resolution_list[[resolution]],
                # Select columns.
                ..varlist]

        } else {
            # Manually aggregate levels files.
            aggregates = get_children(resolution_list[['aggregated']])
            for (reg in names(aggregates)) {
                dflist[[reg]] = memo.csv(glue('{input_dir}/{rcp}/{iam}/{ssp}/',
                '{rcp}-{ssp}-{age}-{scn}-{iam}-ir_level-{units}-{extract}.csv'))[
                    # Subset regions/years.
                    year %in% year_list &
                    region %in% aggregates[[reg]],
                    # Select columns.
                    ..varlist][
                        # Sum over deaths.
                        , lapply(.SD, sum), by=year, .SDcols=qtile][
                            # Replace Region.
                            , region := reg]
            }
        }
    }

    df = rbindlist(dflist, use.names=TRUE)[
        , (qtile) := lapply(.SD, scale_func, scl=scale_variable),
        .SDcols=qtile]

    # Non-costs deathrates impacts are output by the projection system in deaths
    # per person. Convert to deaths per 100k.
    if (units=='rates' & scn != 'costs')
        df = df[, (qtile) := lapply(.SD, scale_func, scl=100000),
        .SDcols=qtile]

    # Costs in levels are output in deaths * 100,000 :shrugging-emoji:
    if (units=='levels' & scn == 'costs')
        df = df[, (qtile) := lapply(.SD, scale_func, scl=1/100000),
        .SDcols=qtile]

    # Convert to dataframe or leave as data.table.
    if (!as.DT) df = data.frame(df)

    return(df)

}


#' Returns mortality impacts directly from raw single projection output.
#' 
#' Some diagnostics in the analysis use output from a single climate model, rcp,
#' SSP, IAM combination rather than the entire distribution of climate models and
#' statistical uncertainty draws. This function allows the user to specify the
#' directory of the raw single projection output (i.e., no `quantiles.py`
#' extraction required) and return a dataframe with the desired results.
#'
#' Inputs
#' ------
#' Path to directory containing raw single projection ouput and various parameters
#' defining the desired impacts output. For example, the main specification's
#' single output is `data/2_projection/3_impacts/main_specification/
#' raw/single/rcp85/CCSM4/low/SSP3`, and this function uses that directory by
#' default; however other directories for diagnostic projections, e.g., "linear
#' extrapolation" or "slow adaptation" diagnostics paths can replace the default
#' path to get output from those singles. See the `2_projection/README.md` for
#' details about available diagnostic runs.
#'
#' Outputs
#' -------
#' Dataframe containing projection output consistent with the input parameters.
#'
#' Parameters/Return
#' -----------------
#' @param single_path Directory containing single GCM projection output.
#' @param regions Regions, can be IRs or aggregated regions. Also accepts:
#' - all: all ~25k impact regions; 
#' - iso: country-level output; 
#' - global: global outputs
#' @param scn adaptation scenario (fulladapt, incadapt, noadapt, costs, 
#' fulladaptcosts, incbenefits, climbenefits).
#' @param age agegroup (young, older, oldest, combined)
#' @param year_list List of years to extract between 2020 and 2099.
#' @param units unit of output (rates, levels).
#' @param var Variable to extract from netcdf, e.g. rebased, transformed. See
#' impact-calculations repo for definitions of these variables.
#' @param basename List containing the basenames associated with different model
#' specifications. Only relevant if you're running a non-poly4 model.
#' @param scale_variable Can be used to scale output, e.g. thousands of deaths.
#' 
#' @return Dataframe containing projected impacts.
get_mortality_impacts_single = function(
    single_path=S_INPUT_DEFAULT,
    regions='all',
    scn='fulladapt',
    age='combined',
    year_list=seq(2020, 2099),
    units='rates',
    var='rebased',
    basename=NULL,
    scale_variable=1 ) {

    # Parse inputs to determine list of regions
    region_list = return_region_list(regions)
    resolution_list = check_resolution(region_list)
    scale_func = function(x, scl) (x * scl)

    if (units=='levels')
        units_suffix='-levels'
    else
        units_suffix=''

    addcosts = FALSE
    outvar = 'adapt'
    if (scn=='fulladapt')
        scn_suffix = ''
    else if (scn=='fulladaptcosts'){
        scn_suffix = ''
        addcosts = TRUE
    } else
        scn_suffix = glue('-{scn}')

    if (scn=='costs') {
        var = 'costs_ub'
        outvar = 'costs'
    }

    if (!is.null(basename))
        basename = basename[[single_path]]
    else
        basename = 'Agespec_interaction_GMFD_POLY-4_TINV_CYA_NW_w1'

    # Construct dataframes with relevant impacts at each spatial resolution.
    dflist = list()
    varlist=c('adapt')
    for (resolution in names(resolution_list)) {
        
        if (resolution=='ir_level')
            resolution_suffix=''
        else if (resolution=='aggregated')
            resolution_suffix='-aggregated'

        # Extract scenario impacts.
        scn_df = open_impacts_nc4(
            glue('{single_path}/',
                '{basename}-{age}{scn_suffix}{resolution_suffix}{units_suffix}.nc4'), 
            outvar, var=var) %>%
            dplyr::filter(
                year %in% year_list,
                region %in% resolution_list[[resolution]])

        # Subtract hisclim for relevant adaptation scenarios.
        if (!(scn %in% c('noadapt', 'histclim', 'costs')) & var=='rebased') {
            histclim = open_impacts_nc4(
                glue('{single_path}/',
                    '{basename}-{age}-histclim{resolution_suffix}{units_suffix}.nc4'),
                'histclim', var=var) %>%
                dplyr::filter(year %in% year_list,
                    region %in% resolution_list[[resolution]])

            scn_df = left_join(scn_df, histclim, by=c('region', 'year')) %>%
                dplyr::mutate(impacts = adapt - histclim) %>%
                dplyr::select(region, year, impacts, adapt, histclim)
            varlist=c('impacts', 'adapt', 'histclim')
        }
        
        # Add adaptation costs to projected outputs.
        if (addcosts) {
            costs = open_impacts_nc4(
                glue('{single_path}/',
                    '{basename}-{age}-costs{resolution_suffix}{units_suffix}.nc4'),
                'costs', var='costs_ub') %>%
                dplyr::filter(year %in% year_list,
                    region %in% resolution_list[[resolution]])

            scn_df = left_join(scn_df, costs, by=c('region', 'year')) %>%
                dplyr::mutate(impacts = impacts + costs/100000) %>%
                dplyr::select(region, year, impacts, adapt, histclim, costs)
        }

        dflist[[resolution]] = scn_df 
    }
    
    df = bind_rows(dflist)

    if (scn %in% c('noadapt', 'histclim', 'costs')) {
        df = dplyr::select(df, region, year, impacts=adapt)
        varlist=c('impacts')
    }

    # Non-costs deathrates impacts are output by the projection system in deaths
    # per person. Convert to deaths per 100k.
    if (units=='rates' & scn != 'costs')
        df = df %>% dplyr::mutate_at(varlist, scale_func, scl=100000)

    # Costs in levels are output in deaths * 100,000 :shrugging-emoji:
    if (units=='levels' & scn == 'costs')
        df = df %>% dplyr::mutate(costs = scale_func(costs, scl=1/100000))

    return(df)

}


#' Pulls mortality covariates from raw single projection output.
#'
#' Single projections output some diagnostic files along with standard projected
#' impacts from a single climate model (usually CCSM4). The `mortality-allpreds.csv`
#' file provies covariates used in the projections. For most models, this is a
#' 13-year bartlett kernel average of logged GDP per capita and a 30-year bertlett
#' kernel average of daily average temperature. This function accesses these data
#' and performs a population weighted collapse across regions if an aggregated
#' region is queried.
#'
#' Inputs
#' ------
#' Path to single directory from which the `allpreds.csv` file will be opened.
#' 
#' Outputs
#' -------
#' Dataframe containing covariates consistent with the input parameters
#' 
#' Parameters/Return
#' -----------------
#' @param  single_path Directory containing single GCM projection output.
#' @param units Covariates available in all-preds file; namely `climtas` (long
#' run average temperature') and `loggdppc` (long run average log(GDPpc))
#' @param regions Regions, can be IRs or aggregated regions. Also accepts:
#' - all: all ~25k impact regions; 
#' - iso: country-level output; 
#' - global: global outputs
#' @param year_list List of years to extract between 2020 and 2099.
#'
#' @return Dataframe containing covariates.
get_mortality_covariates = function(
    single_path=S_INPUT_DEFAULT,
    units=c('climtas', 'loggdppc'),
    regions='all',
    year_list=seq(2020,2099),
    as.DT=FALSE,
    scn=NULL,
    ...) {

    # Parse inputs to determine list of regions
    region_list = return_region_list(regions)
    resolution_list = check_resolution(region_list)

    # Open input file
    df = memo.csv(glue('{single_path}/mortality-allpreds.csv')) %>%
        dplyr::filter(
            model=='Agespec_interaction_GMFD_POLY-4_TINV_CYA_NW_w1-young',
            year %in% year_list) %>%
        dplyr::select(all_of(c('region', 'year', units)))

    # Construct dataframes with relevant values at each spatial resolution.
    df_list = list()
    for (resolution in names(resolution_list)) {
        if (resolution=='ir_level') {
            df_list[[resolution]] = dplyr::filter(df,
                region %in% resolution_list[[resolution]])
        } else if (resolution=='aggregated') {
            # Calculate pop-average if aggregated regions are queried.
            pop = get_econvar(
                units=c('pop'),
                regions='all',
                year_list=year_list, ...)
            message('Pop loaded...')
            ag_list = list()
            for (reg in resolution_list[[resolution]]) {
                ag_list[[reg]] = popwt_collapse_to_region(
                    df, units, reg, pop=pop)
            }
            df_list[[resolution]] = bind_rows(ag_list)
        }
    }
    df = bind_rows(df_list)

    if (as.DT) df = data.table(df)

    return(df)
}


#' Converts impacts nc4 into workable dataframe. Mostly used as
#' a helper fucntion for `get_mortality_impacts_single`.
#'
#' Parameters/Return
#' -----------------
#' @param nc4_dir Directory containing single GCM projection output.
#' @param adapt Adaptation scenario.
#' @param var Variable to extract from nc4.
#'
#' @return Dataframe containing extracted impacts.
open_impacts_nc4 = function(nc4_dir, adapt, var='rebased') {

    ncin = nc_open(nc4_dir)
    out_array = ncvar_get(ncin, var)
    dims = c()
    dim_list = list()
    for (i in seq(1, length(ncin$var[[var]][['dim']]))) {
        d = ncin$var[[var]][['dim']][[i]][['vals']]
        dims = c(dims, length(d))
        dim_list[[i]] = d
    }
    dim_list[[1]] = ncvar_get(ncin, 'regions')

    out = array(
        out_array,
        dim=dims,
        dimnames=dim_list)

    df = data.frame(out) %>%
        tibble::rownames_to_column("region") %>%
        tidyr::gather(key='year', value=!!adapt, -region) %>%
        dplyr::mutate(year = as.numeric(substr(year, 2, 5)))

    nc_close(ncin)

    return(df)
}
